*************************************************************   
* do-file to create variables from NELS:1988				*
* sample for part-time and not year-round full-time workers *
* fo 18.02.2016												*
*************************************************************
log using "Output\variables_pt", replace text
set more off

*** open dataset ***
use F2SES1	F22XRSTD	F22XMSTD	F22XSSTD	F2RHENG2	F2RHMAG2	F2RHSCG2	///
   PPOSTEX1	F2S40C		F4BJNUM		F4BJCNT		F4BWFOR		F4BXOCCD	F4BXINCD	///
   F4BJHPW	F4BAHPW		F4BWKSWK	F4BLHPW		F4BJAUT		F4CTRNQ		F4EDGR1		///
   F4EPGR1	F4ESCT1		F4ED1		F4EMJ1D		F4EDGR2		F4EPGR2		F4ESCT2		///
   F4ED2	F4EMJ2D		F4EDGR3		F4ESCT3		F4ED3		F4EMJ3D		F4EGRD		///
   F4GMRS	F4GNCH		F4HI99		F4SEX		F4HHDG		STU_ID		F4QWT		///
   F4BYPNWT	F4PNLWT		F4F1PNWT	F4F2PNWT	F4CXTWT		F4PAQWT		F4TRSCWT	///
   F4QWT92G	F4GNDP		F4GNDPC		F4AACTF 	F4AACTP		F4RACE		F4TYPEDG	using "Data\NELS_88_00_BYF4STU_v1_0", clear

svyset [pw=F4QWT]		
lab def noyes 0 "no" 1 "yes"

*** dependent variable: annual income ***
recode F4HI99 (-7 -3 -2 -1=.), gen(inc_year)	
gen ln_inc_year=ln(inc_year)

*** independent variables ***
* gender *
recode F4SEX (-1=.)(1=0)(2=1), gen(frau)
lab var frau "gender"
lab def frau 0"male" 1"female"
lab val frau frau
drop if frau==.
recode frau(0=1)(1=0), gen(mann)

* ses of family of origin *		
recode F2SES1(99.998=.), gen(parses)

* race of respondent *
recode F4RACE(-9=.)(5=4), gen(race)
lab def race 1 "Asian Amercian" 2 "Latino" 3 "Black" 4 "White or other"

* importance of having lots of money * 
recode F2S40C (9 8 7 6=.), gen(zigeld)
lab var zigeld "Ziel sehr gut verdienen"

* family characteristics *

* single *
recode F4GMRS (-7 -2 -1=.)(1=1)(2 3 4 5 6=0), gen(single)
lab var single "single"

* married *
recode F4GMRS (-7 -2 -1=.)(2 6=1)(1 3 4 5=0), gen(married)
lab var married "married or marr.-like rel."

* divorced *
recode F4GMRS (-7 -2 -1=.)(3 4 5=1)(1 2 6=0), gen(divorced)
lab var married "divorced/separated/widowed"

* parent *
recode F4GNDP (-7 -2 -1=.)(1/9=1)(0=0), gen(parent)
replace parent = 0 if parent==1 & F4GNDP==0		/*not a parent if none of the dependants are children */ 

* single parent *
gen parent_single = 0
replace parent_single = 1 if parent==1 & single==1
replace parent_single = . if parent==. | single==.
lab var parent_single "single parent"

* number of children *
gen nrchildren = .
replace nrchildren = 0 if parent==0
replace nrchildren = F4GNDPC if parent==1 & F4GNDPC > 0
lab var nrchildren "number of children"

	
* hours worked per week * 
recode F4BLHPW  (-8 -3 -2 -1 =.), gen (arbzt_woch)

* sector *
recode F4BWFOR (-9 -3 -2 -1=.)(1=1)(2/6=0), gen(privat)

* industry *	
recode F4BXINCD (-9 -3 -6 -2 -1=.)(2=1)(11=10)(15 21=14), gen(branche)

lab def branche	1 "Acriculture, forestry, fisheries, mining" /// 
	3 "Construction and allied" ///
	4 "Manufacturing: Durable goods" ///
	5 "Manufacturing: Nondurable goods" ///
	6 "Utilities" ///
	7 "Wholesale distribution" ///
	8 "Retail trades" ///
	9 "Finance, insurance, real estate" ///
	10 "Business, personal services" ///
	12 "Entertainment, recreation" ///
	13 "Professional services" ///
	14 "Public administration, safety, military" ///
	16 "Health care" ///
	17 "Communications" ///
	18 "Transportation" ///
	19 "Hospitality" ///
	20 "Education"
lab val branche branche	

* occupation *
recode F4BXOCCD (-6 -3 -2 -1=.)(3=2)(5=8)(7=6)(12 11=10)(14=13)(20=19)(23=21) ///
	(33=32)(41 42=.), gen(beruf)

lab def beruf 1 "Secretary, receptionist" ///
	2 "Cashier, teller, clerk, data entry" ///
	4 "Other clerical" ///
	6 "Personal service, cook, chef, baker" ///
	8 "Farmer, forester, farm laborer, nonfarm laborer" ///
	9 "Mechanic, repairer, service technician" ///
	10 "Craftsman, skilled operative" ///
	13 "Protective service, criminal justice, military" ///
	15 "Business and financial support services" ///
	16 "Financial service professional" ///
	17 "Sales, purchasing" ///
	18 "Customer service" ///
	19 "Legal support" ///
	21 "Medical practice professional, services" ///
	22 "Medical licensed professional" ///
	24 "Educators (K-12 teachers)" ///
	25 "Educators, instructors (non-K-12)" ///
	26 "Human service professional" ///
	27 "Engineer, architect, software engineer" ///
	28 "Scientist, statistician professional" ///
	29 "Research assistant, lab technician" ///
	30 "Technical, professional worker" ///
	31 "Computer systems, related professional" ///
	32 "Computer programmer, other computer" ///
	34 "Editor, writer, reporter" ///
	35 "Performer, artist" ///
	36 "Manager, executive" ///
	37 "Manager, midlevel" ///
	38 "Manager, supervisory, office" ///
	39 "Health, recreational services"
lab val beruf beruf
numlabel beruf, add

* job training *
recode F4CTRNQ (-7 -3 -2 -1=.), gen(jobtrain)

* job autonomy *
recode F4BJAUT (-3 -2 -1=.), gen(autonom)

* standardized Test Scores; Grades *
* reading
recode F22XRSTD (99.98 99.99=.), gen(score_read)
* math
recode F22XMSTD (99.98 99.99=.), gen(score_math)
* science 
recode F22XSSTD (99.98 99.99=.), gen(score_scie)

* SAT scores *
* Only available in restricted use file *

* 12th grade grades *
* english
recode F2RHENG2 (99.98 99.99=.), gen(grade_eng)
* math *
recode F2RHMAG2 (99.98 99.99=.), gen(grade_math)
* science *
recode F2RHSCG2 (99.98 99.99=.), gen(grade_scie)

* undergraduate GPA *	
recode F4EGRD (8 9 -7 -3 -2 -1=.)(1=3.875)(2=3.5)(3=3.0)(4=2.5)(5=2.0)(6=1.5)(7=0.625), gen(ugrad_gpa)

* college major *	
recode F4EMJ1D	(10=1)(20=2)(30 31=3)(40=4)(50/53=5)(60/80=52)(90 91=9)(100=10) ///
				(110/112=11)(120/121=12)(130/135=13)(140/144=14)(150=15)(160/162=16) ///
				(170/190=51)(191=48)(192=19)(192/196=51)(200 201=19)(220 221=22) ///
				(230/232=23)(240=24)(250=25)(260/263=26)(270 271=27)(280=43) ///
				(300=45)(301/304=30)(310=31)(320=.)(380 381=38)(390=39)(400/403=40) ///
				(420=42)(430=43)(440 441=44)(450/457=45)(460=46)(470/472=47) ///
				(480 481=48)(490 491=49)(500/505=50)(-1 -2 -3 -4 -6 -7 900=.), gen(cipcode)

* college major *	
recode cipcode	(1/4=2)(5=3)(8=1)(9=3)(10 11=2)(12=1)(13=4)(14 15=2)(16=3)(19 20=3) ///
				(22=1)(23/25=3)(26/30=2)(31=3)(38 39=3)(40 41=2)(42=3)(43=1)(44 45=3) ///
				(46/49=2)(50=3)(51=2)(52=1), gen(fachgruppe)
	
lab var fachgruppe "Fachgruppe"
lab def fachgruppe 1 "Business, Econ.Sci., Law" 2 "Math, Nat.Sci. Engin." 3 "Soc.Sci, Humanities" 4 "Education"
lab val fachgruppe fachgruppe				
				
save "Data\NELS.dta", replace

*** compute percentage female of major from IPEDS ***
* load IPEDS dataset *                                                    
insheet using "Data\c2001_a2dig_data_stata.csv", comma clear
	drop crace01-crace14
	drop if awlevel == 1 | awlevel == 2 | awlevel == 3 | awlevel == 4 |awlevel == 11  /* use only degree types used in BZ */

* consolidate CIP code to 2 digits and label values *
recode cipcode (220101=220000)(390000/391000=390000)(510000/512500=510000)(80000=520000)
replace cipcode = cipcode/10000
	label define label_cipcode 1 "01 - Agricultural Business and Production" 
	label define label_cipcode 2 "02 - Agricultural Sciences", add 
	label define label_cipcode 3 "03 - Conservation and Renewable Natural Resources", add 
	label define label_cipcode 4 "04 - Architecture and Related Programs", add 
	label define label_cipcode 5 "05 - Area, Ethnic and Cultural Studies", add 
	label define label_cipcode 8 "08 - Marketing Operations/Marketing and Distribution", add 
	label define label_cipcode 9 "09 - Communications", add 
	label define label_cipcode 10 "10 - Communications Technologies", add 
	label define label_cipcode 11 "11 - Computer and Information Sciences", add 
	label define label_cipcode 12 "12 - Personal and Miscellaneous Services", add 
	label define label_cipcode 13 "13 - Education", add 
	label define label_cipcode 14 "14 - Engineering", add 
	label define label_cipcode 15 "15 - Engineering-Related Technologies", add 
	label define label_cipcode 16 "16 - Foreign Languages and Literatures", add 
	label define label_cipcode 19 "19 - Home Economics, General", add 
	label define label_cipcode 20 "20 - Vocational Home Economics", add 
	label define label_cipcode 22 "22 - Law and Legal Studies", add 
	label define label_cipcode 23 "23 - English Language and Literature/Letters", add 
	label define label_cipcode 24 "24 - Liberal Arts and Sciences, General Studies & Humanities", add 
	label define label_cipcode 25 "25 - Library Science", add 
	label define label_cipcode 26 "26 - Biological Sciences/Life Sciences", add 
	label define label_cipcode 27 "27 - Mathematics", add 
	label define label_cipcode 29 "29 - Military Technologies", add 
	label define label_cipcode 30 "30 - Multi/Interdisciplinary Studies", add 
	label define label_cipcode 31 "31 - Parks, Recreation, Leisure and Fitness", add 
	label define label_cipcode 38 "38 - Philosophy and Religion", add 
	label define label_cipcode 39 "39 - Theological Studies and Religious Vocations", add 
	label define label_cipcode 40 "40 - Physical Sciences", add 
	label define label_cipcode 41 "41 - Science Technologies", add 
	label define label_cipcode 42 "42 - Psychology", add 
	label define label_cipcode 43 "43 - Protective Services", add 
	label define label_cipcode 44 "44 - Public Administration and Services", add 
	label define label_cipcode 45 "45 - Social Sciences and History", add 
	label define label_cipcode 46 "46 - Construction Trades", add 
	label define label_cipcode 47 "47 - Mechanics and Repairers", add 
	label define label_cipcode 48 "48 - Precision Production Trades", add 
	label define label_cipcode 49 "49 - Transportation and Material Moving Workers", add 
	label define label_cipcode 50 "50 - Visual and Performing Arts", add 
	label define label_cipcode 51 "51 - Health Professions and Related Sciences", add 
	label define label_cipcode 52 "52 - Business Management & Admin. Services", add 
	label define label_cipcode 95 "95 - Undesignated field of study", add 
	label define label_cipcode 99 "99 - Award Level Total", add 
label values cipcode label_cipcode

* compute proportion female * 
bysort cipcode: egen nrfemale=total(crace16)
bysort cipcode: egen nrmale=total(crace15)
gen fa_frau = nrfemale / (nrfemale + nrmale)

* collapse to major level and merge into NELS *
bysort cipcode: keep if _n==1
keep cipcode fa_frau
merge 1:m cipcode using "Data\NELS.dta"
	drop if _merge==1	/* drop majors not in NELS */
	
* highest degree
recode F4TYPEDG (-3 -9=.)(4 6 7 8=1)(1 2 3 5 9 10=0), gen(degree_ba)
recode F4TYPEDG (-3 -9=.)(9=1)(1 2 3 5 4 6 7 8 10=0), gen(degree_ma)
recode F4TYPEDG (-3 -9=.)(10=1)(1 2 3 5 4 6 7 8 9=0), gen(degree_phd)

* institutional selectivity *
* only available from PETS file (restricted) *

* employment *
gen employ = 0
replace employ = 1 if (F4AACTF == 1 | F4AACTP == 1)

*********************
*** select sample ***
*********************
drop if F4EMJ1D==-3 | F4EMJ1D==900 | F4TYPEDG == -9 | F4TYPEDG == 1 | F4TYPEDG == 2 | F4TYPEDG == 3 | F4TYPEDG == 5 	/* drop if major or highest degree does not apply or is less than B.A. (as in B.Z.) */
keep if employ==1 & arbzt_woch>=10						/* this time keep part-time and non-year-round workers */
drop if inc_year == .									/* delete obs with missing income data, as in BZ */
drop if inc_year < 2000 								/* as in BZ */
drop if inc_year > $censor	 										/* as in BZ */
drop if frau == . | parent == . | married == . 						/* don't want to impute that, 1 obs dropped */

*********************
misstable summarize race inc_year fachgruppe ///
	fa_frau score_read score_math score_scie grade_eng grade_math grade_scie ///
	parses zigeld single married parent arbzt_woch privat ///
	beruf branche autonom jobtrain nrchildren

************************************
*** imputation of missing values ***
************************************
global impnr = 30	/* set nr of imputations */
svyset[pw=F4QWT]
sort STU_ID

* impute field of study *
* assign mode field of study based on occupation (19 obs) * 
gen fieldmiss = 0
replace fieldmiss = 1 if fachgruppe==.
tab beruf if fachgruppe==.

qui foreach num in 1 13 15 16 17 21 22 24 25 26 29 31 34 38	{
	egen mode = mode(fachgruppe) if fachgruppe!=. & beruf == `num', minmode
	mean mode
	replace fachgruppe = _b[mode] if fachgruppe==. & beruf == `num'
	drop mode
	}

* use prop female from broad field of study for the imputed cases *
foreach num in 1 2 3 4	{	
	svy: mean frau if fachgruppe == `num'
	replace fa_frau = _b[frau] if fieldmiss==1 & fachgruppe==`num'
	}
	
*	impute occupation: difficult for mlogit because many very small categories hence:	*
* 	assign to one of the larger occupations with probability equal to relative frequency of that occupation among respondents with same field of study.	*
gen occmiss = 0
replace occmiss = 1 if beruf==.

* for fachgr_wiwi *
svy: tab beruf if fachgruppe==1 & (beruf == 13 | beruf == 15 | beruf == 16 | beruf == 17 | beruf == 31 | beruf == 37 | beruf == 38)	/* get relative frequencies */
	mat fgr = e(b)
set seed 703658
gen random1 = runiform()
replace beruf = 13 if beruf ==. & fachgruppe==1 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 15 if beruf ==. & fachgruppe==1 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 16 if beruf ==. & fachgruppe==1 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 17 if beruf ==. & fachgruppe==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 31 if beruf ==. & fachgruppe==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 37 if beruf ==. & fachgruppe==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 38 if beruf ==. & fachgruppe==1 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
drop random1

* for fachgr_natur *
svy: tab beruf if fachgruppe==2 & (beruf == 13 | beruf == 15 | beruf == 16 | beruf == 17 | beruf == 21 | beruf == 22 | beruf == 24 | beruf == 27 | beruf == 28 | beruf == 29 | beruf == 30 | beruf == 31 | beruf == 32 | beruf == 37 | beruf == 38)		/* get relative frequencies */
	mat fgr = e(b)
set seed 19477	
gen random1 = runiform()
replace beruf = 13 if beruf ==. & fachgruppe==2 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 15 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 16 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 17 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 21 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 22 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 24 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
replace beruf = 27 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8])
replace beruf = 28 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9])
replace beruf = 29 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10])
replace beruf = 30 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11])
replace beruf = 31 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12])
replace beruf = 32 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12] + fgr[1,13])
replace beruf = 37 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12] + fgr[1,13]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12] + fgr[1,13] + fgr[1,14])
replace beruf = 38 if beruf ==. & fachgruppe==2 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12] + fgr[1,13] + fgr[1,14]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12] + fgr[1,13] + fgr[1,14]+ fgr[1,15])
drop random1

* for fachgr_geist *
svy: qui tab beruf if fachgruppe==3 & (beruf == 13 | beruf == 15 | beruf == 16 | beruf == 17 | beruf == 19 | beruf == 24 | beruf == 25 | beruf == 26 | beruf == 31 | beruf == 34 | beruf == 35 | beruf == 37 | beruf == 38)	/* get relative frequencies */
	mat fgr = e(b)
set seed 3035	
gen random1 = runiform()
replace beruf = 13 if beruf ==. & fachgruppe==3 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 15 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
replace beruf = 16 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3])
replace beruf = 17 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4])
replace beruf = 19 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5])
replace beruf = 24 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6])
replace beruf = 25 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7])
replace beruf = 26 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8])
replace beruf = 31 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9])
replace beruf = 34 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10])
replace beruf = 35 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11])
replace beruf = 37 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12])
replace beruf = 38 if beruf ==. & fachgruppe==3 & random1 >= (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12]) & random1 < (fgr[1,1] + fgr[1,2] + fgr[1,3] + fgr[1,4] + fgr[1,5] + fgr[1,6] + fgr[1,7] + fgr[1,8] + fgr[1,9] + fgr[1,10] + fgr[1,11] + fgr[1,12] + fgr[1,13])
drop random1

* for fachgr_erzieh *
svy: qui tab beruf if fachgruppe==4 & (beruf == 24 | beruf == 25)	/* get relative frequencies */
	mat fgr = e(b)
set seed 34580	
gen random1 = runiform()
replace beruf = 24 if beruf ==. & fachgruppe==4 & random1 >=0 & random1 < (fgr[1,1])
replace beruf = 25 if beruf ==. & fachgruppe==4 & random1 >= (fgr[1,1]) & random1 < (fgr[1,1] + fgr[1,2])
drop random1

drop if beruf==. & fachgruppe == .	/* drop the one obs that has both missing */

* impute number of children *
gen nrcmiss = 0
replace nrcmiss = 1 if nrchildren == .

* for fathers *
svy: tab nrchildren if frau==0 & parent==1
	mat nrc = e(b)
set seed 937
gen random1 = runiform()
replace nrchildren = 1 if nrchildren ==. & frau==0 & parent==1 & random1 >=0 & random1 < (nrc[1,1])	
replace nrchildren = 2 if nrchildren ==. & frau==0 & parent==1 & random1 >= (nrc[1,1]) & random1 < (nrc[1,1] + nrc[1,2])	
replace nrchildren = 3 if nrchildren ==. & frau==0 & parent==1 & random1 >= (nrc[1,1] + nrc[1,2])	
drop random1

* for mothers *
svy: tab nrchildren if frau==1 & parent==1
	mat nrc = e(b)
gen random1 = runiform()
replace nrchildren = 1 if nrchildren ==. & frau==1 & parent==1 & random1 >=0 & random1 < (nrc[1,1])	
replace nrchildren = 2 if nrchildren ==. & frau==1 & parent==1 & random1 >= (nrc[1,1]) & random1 < (nrc[1,1] + nrc[1,2])	
replace nrchildren = 3 if nrchildren ==. & frau==1 & parent==1 & random1 >= (nrc[1,1] + nrc[1,2])
drop random1
	
*** multiple imputation by chained equations ***
* define upper and lower bounds for certain variables *
qui foreach var of varlist score_read score_math score_scie grade_eng grade_math grade_scie parses zigeld autonom	{
	sum `var'
	gen `var'_l = r(min)
	gen `var'_u = r(max)
	}

misstable summarize race inc_year fachgruppe ///
	fa_frau score_read score_math score_scie grade_eng grade_math grade_scie ///
	parses zigeld single married parent arbzt_woch privat ///
	beruf branche autonom jobtrain, all

sort STU_ID
mi set wide
mi register imputed score_read score_math score_scie grade_eng grade_math grade_scie parses zigeld arbzt_woch privat branche autonom jobtrain 
mi register regular race inc_year fachgruppe fa_frau single parent married beruf employ frau F4QWT
mi svyset [pw=F4QWT]

set more off
sort STU_ID
mi impute chained 	(regress) 	score_read score_math score_scie grade_eng grade_math grade_scie parses zigeld arbzt_woch autonom ///
					(logit) 	privat jobtrain ///
					= race i.fachgruppe fa_frau single married parent, by(frau) replace add($impnr) rseed(633089)
sort STU_ID					
mi impute mlogit	branche score_read score_math score_scie grade_eng grade_math grade_scie parses zigeld arbzt_woch autonom fa_frau, replace rseed(19587)
				
* censor imputed values *
qui foreach num of numlist 1/$impnr	{
	qui foreach var of varlist score_read score_math score_scie grade_eng grade_math grade_scie parses zigeld autonom	{
		local varname = "_" + "`num'" + "_" + "`var'"
		replace `varname' = `var'_l if `varname' < `var'_l & `var' == .   /* replace the imputed value with the lower bound if the imputed value lies below lower bound */
		replace `varname' = `var'_u if `varname' > `var'_u & `var' == .   /* replace the imputed value with the upper bound if the imputed value lies above upper bound */
		}			
	}

mi unregister fachgruppe beruf
mi register imputed fachgruppe beruf
	
tab beruf, gen(beruf_)				/* generate dummies */
	local rows = r(r)
mi register imputed beruf_* 		/* register dummies as derived from imputed variable */	
replace beruf = . if occmiss==1		/* replace main variable with . if value was imputed */
foreach num of numlist 1/`rows'	{	/* replace derived dummy with . if value was imputed */
	replace beruf_`num' = . if occmiss==1
	}

tab fachgruppe, gen(fachgr_)
	local rows = r(r)
mi register imputed fachgr_*
replace fachgruppe = . if fieldmiss==1 
foreach num of numlist 1/`rows'	{	
	replace fachgr_`num' = . if fieldmiss==1
	}
	
* because mi passive doesnt work with -tab,gen()- the following is necessary *	
tab branche, gen(branche_)
mi register imputed branche_1 - branche_17
foreach num of numlist 1/$impnr	{
	drop _`num'_branche_*
	tab _`num'_branche, gen(_`num'_branche_)
	}

recode nrchildren (0=0)(1/10=1), gen(temp2)
tab temp2, gen(temp2_)
rename temp2_2 children

* single parents *
gen childrenxsingle = children * single

* interactions *
foreach var of varlist married children	{
	gen `var'xfrau = `var' * frau
	}

foreach var of varlist nrchildren children childrenxfrau {
	mi register imputed `var'
	foreach num of numlist 1/$impnr	{
		drop _`num'_`var'
		gen _`num'_`var' = `var'
		}
	replace `var' = . if nrcmiss==1
	}

tab race, gen(race_)
	rename race_1 asian
	rename race_2 latino
	rename race_3 black
	rename race_4 white
	
keep if (arbzt_woch >=10 & arbzt_woch < 35) | (arbzt_woch >=35 & F4BWKSWK<52 & F4BWKSWK>0)	/* keep part time and non-year-round workers */
	
save "Data\NELS_pt_imputed.dta", replace
log close
*** END OF DOFILE ***
